#!/usr/bin/env python
#coding=utf-8

"""
发送请求,支持代理,ua,登陆等信息的cookies能自动处理


代码示例
proxy_conf={'ip':'192.168.31.121','port':27017,'db':'crawler_proxy','collection':'proxy'}
request_util = RequestUtil(proxy_conf)
request_util 是一个单例，能保存自动处理登陆后的cookies等信息
response = request_util.make_request('http://www.baidu.com')
"""


import requests
import random
import warnings
from pymongo import MongoClient


USER_AGENTS = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ',
 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)" ',
 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER',
 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400) ',
 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ',
 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE) ',
 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ',
 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1',
 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1',
 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ',
 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ',
 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0) Gecko/20121026 Firefox/16.0',
 'Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5',
 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre',
 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
 'Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-CN; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15',
 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16',
 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0)',
 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
 'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
 ]

def get_random_ua():
    return random.choice(USER_AGENTS)


class Singleton(type):
    def __init__(cls, name, bases, dict):
        super(Singleton, cls).__init__(name, bases, dict)
        cls._instance = None

    def __call__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instance


class RequestUtil():
    """发送请求的工具方法"""
    __metaclass__ = Singleton

    def __init__(self,proxy_conf={},headers={}):

        self.session = requests.Session()
        if headers:
            self.session.headers = headers
        else:
             self.session.headers['User-Agent'] = random.choice(USER_AGENTS)
        self.proxy_collection = None
        self.all_proxy = []
        self._init_proxy(proxy_conf)  #初始化代理
        self.current_proxy = {}
        if self.all_proxy:
            self.current_proxy = random.choice(self.all_proxy)#当前代理
        


    def set_hreaders(self,headers,is_random_ua=True):
        """
        设置headers,ua ,is_random_ua=True 会自动设置ua
        :param hreaders:
        :return:
        """
        old_ua = self.session.headers.get('User-Agent','')
        self.session.headers = headers
        if not self.session.headers.get('User-Agent'):
            self.session.headers['User-Agent']=old_ua


    def _init_proxy(self,proxy_conf):
        if proxy_conf:
            ip = proxy_conf.get('ip')
            if not ip:
                raise Exception(u"proxy ip is None")
            port = proxy_conf.get('port')
            if not port:
                port = 27017
            client = MongoClient(ip,port)
            db_name = proxy_conf.get('db')
            if not db_name:
                raise Exception(u'db is None')
            collection_name = proxy_conf.get('collection')
            if not collection_name:
                raise Exception(u'collection is None')
            db = client[db_name]
            self.proxy_collection = db[collection_name]
            self.fresh_proxy()


    def fresh_proxy(self):
        """从数据库获得代理

        """
        try:
            for row in self.proxy_collection.find():
                tem_proxy = {}
                tem_proxy['http']="http://"+row['_id']
                self.all_proxy.append(tem_proxy)
                self.current_proxy = random.choice(self.all_proxy)
        except Exception,e:
            print e
            raise Exception(u'get proxy form db faild')


    def change_ua(self,useragent=''):
        """改变ua,如果userageng为空,则随机产生ua"""

        if not useragent :
            self.headers['User-Agent'] = random.choice(USER_AGENTS)
        else:
            self.headers['User-Agent'] = useragent

        self.session.headers = self.headers


    def change_ip(self):
        try:
            self.all_proxy.remove(self.current_proxy)
        except Exception,e:
            warnings.warn('remove ip failed')
        self.current_proxy = random.choice(self.all_proxy) if self.all_proxy else {}


    def make_request(self,url,method="get",data={},timeout=8,isproxy=False):
        """发送请求

        isproxy 是否使用代理,True:直接使用代理，False:默认不使用代理
        """

        if isproxy and self.current_proxy:
            print "change ip"
            self.change_ip()
            print self.current_proxy

        if method=="get":
            print self.current_proxy
            response = self.session.get(url, timeout=timeout,params=data,proxies=self.current_proxy)

        elif method =="post":
            response = self.session.post(url,timeout=timeout,data=data,proxies=self.current_proxy)
        else:
            raise Exception('request method is wrong')
        return response


if __name__=="__main__":
    proxy_config = {
        'ip':u'代理库ip地址',
        'port':27017,
        'db':u'crawler_proxy',
        'collection':u'proxy',
    }
    request_util = RequestUtil(proxy_config)
    while True:
        try:
            response = request_util.make_request('http://xlzd.me')
            if response.status_code==200:
                print request_util.current_proxy
                break
        except Exception,e:
            request_util.change_ip()
            continue


    print response.status_code
    #res = session.get('http://www.p2peye.com/thread-594306-1-1.html')
    #print res.content
    #print response.content